/*******************************************************************************
Figure2.do

This file looks at population densities for Walgreens and Rite Aid pharmacy 
locations, and plots the cumulative number of establishments in locations with
lesser or equal population density. We create these cumulative histograms by 
binning log population density and computing a running sum of the number of 
establishments in each bin or below.

Last updated: 01/10/2020
*******************************************************************************/

version 15
cd "C:\Plants_in_Space"
set more off
set type double
set varabbrev off
graph set window fontface "Times New Roman" 

// Keep Walgreens and Rite Aid pharmacy locations 
use "Data\Intermediate\NETS\NETS_HQs_at_least_5_employees_2014_cleaned.dta", clear
keep if sic4 == 5912 & (hq == 14578892 | hq == 8965063)
gen corp = 0
gen name = ""
replace corp = 1 if hq == 8965063
replace name = "Walgreens" if hq == 8965063
replace corp = 2 if hq == 14578892
replace name = "Rite Aid" if hq == 14578892
labmask corp, values(name)

// Merge in data on population density
merge m:1 ID_6 using "Data\Intermediate\grid_population_and_employment_data\population_and_employment_data_M6.dta", keep(3) keepusing(pop_density_6) nogen
gen log_pop_density = log(pop_density_6)
replace log_pop_density = 0 if missing(log_pop_density)
label variable log_pop_density "Population Density"

// Divide log-space of population density into bins of equal width, then assign
// establishments to bins based on density of their locations. Next, count the
// number of establishments in each bin for Walgreens and Rite Aid separately.
local width = 0.25
qui sum log_pop_density
qui return list
local min = r(min)
local max = r(max)
local range = `max'-`min'
local num_bins = ceil(`range' / `width')

gen bin = 0
forvalues i = 1/`num_bins' {
	local j = `i' - 1
	replace bin = `i' if (log_pop_density >= `min' + `width' * `j' - 1e-9) & (log_pop_density < `min' + `width' * `i')
}
assert bin > 0
by corp bin, sort: gen num_plants = _N
by corp bin, sort: keep if _n == 1

// Fill in "missing" bins (bins where the number of establishments is zero)
forvalues b = 1/`num_bins' {
	forvalues c = 1/2 {
		qui count if bin == `b' & corp == `c'
		if r(N) == 0 {
			set obs `=_N+1'
			replace bin = `b' in `=_N'
			replace corp = `c' in `=_N'
			replace num_plants = 0 in `=_N'
		}
	}
}

// Compute a running sum of the number of establishments by bin for each
// corporation, then create a histogram to show the cumulative the number of 
// establishments
sort corp bin
by corp, sort: gen cum_num_plants = sum(num_plants)
gen ticks = `min' + (bin - 1) * `width'   // placement of bins in log-space
twoway (bar cum_num_plants ticks if corp==1, barwidth(`width') lcolor(gs0) fcolor(none) lwidth(thin)) ///
	   (bar cum_num_plants ticks if corp==2, barwidth(`width') lcolor(gs0%25) fcolor(gs0%35) lwidth(thin)), ///
		xtitle("Population Density", size(medlarge)) ytitle("Cumulative Number of Plants", size(medlarge)) ///
		ylabel(, labsize(medlarge) nogrid) ///
		xlabel(0 "10{superscript:0}" 2.3025851 "10{superscript:1}" 4.6051702 "10{superscript:2}" 6.9077553 "10{superscript:3}" 9.2103404 "10{superscript:4}", labsize(medlarge)) ///
		legend(label(1 "Walgreens") label(2 "Rite Aid") size(medlarge) symx(*0.5) symy(*0.8) cols(1) ring(0) bplacement(nw)) ///
		plotregion(margin(l=0 b=0) fcolor(white)) graphregion(color(white)) bgcolor(white)
graph export "Plots\Figure2.png", width(1000) replace

